#################################################################
#Code to scrape bills from govinfo.gov
#
#Last update: April 21, 2023

#Authors: Karen Simpson and Jeremy Gelman
#################################################################


input_dir <- "/replication/"
output_dir <- "/replication/bills"

date <- "03_15-" #Insert date of scraping

key <- "Insert unique API key" #Requires unique API key. 

library(httr)
library(stringr)

#Cue up list

setwd(output_dir)

ls <- list.files()

if(length(ls)>0){
  lsa <- data.frame(ls)
  lsa$a <- gsub(".txt","",lsa$ls)
  lsa$ls <- NULL
  lsa$done <- 1
  
  setwd(input_dir)
  
  df <- read.csv("fullbill_list.csv") 
  df$X <- NULL
  
  df <- merge(df, lsa, by="a", all=TRUE)
  
  df <- df[is.na(df$done)==TRUE,]
} else {
  setwd(input_dir)
  df <- read.csv("fullbill_list.csv")
  df$X <- NULL
  df$done <- 0
}

for(i in 1:length(df$a)){
  out <- tryCatch({
    bill <- GET(paste0("https://api.govinfo.gov/packages/", df[i, 1], "/htm?api_key=", key))
    bill2 <- rawToChar(bill$content)
    write.table(bill2, file=paste0(output_dir, df[i,1], ".txt"))
    df$done[i] <- 1
  },
  error = function(e) "error"
  )
  if (!is.null(out)) {
    if (out == "error") {
      df$done[i] <- "error"
    }
  }
}

write.csv(df, file=paste0(output_dir, date, "scraperesults.csv"))